# -*- coding: utf-8 -*- #
# frozen_string_literal: true

PHP_DOCS_URI = "https://www.php.net/distributions/manual/php_manual_en.tar.gz"
PHP_KEYWORDS_FILE = "./lib/rouge/lexers/php/keywords.rb"

namespace :builtins do
  task :php do
    generator = Rouge::Tasks::Builtins::PHP.new

    files    = generator.download_docs(PHP_DOCS_URI)
    keywords = generator.extract_keywords(files.values)

    output = generator.render_output(keywords)

    File.write(PHP_KEYWORDS_FILE, output)
  end
end

module Rouge
  module Tasks
    module Builtins
      class PHP
        def download_docs(input)
          files = Hash.new

          system "mkdir -p /tmp/rouge"
          Dir.chdir "/tmp/rouge" do
            system "wget -qO- #{input} | tar -xz"
            Dir.chdir './php-chunked-xhtml' do
              Dir.glob('./ref.*').sort.each do |f|
                files[File.basename(f)] = File.read(f)
              end
            end
          end

          files
        end

        def extract_keywords(files)
          keywords = Hash.new { |h,k| h[k] = Array.new }

          files.each do |file|
            file =~ %r(<title>(.*?) Functions</title>)
            name = $1

            next unless name

            file.scan %r(<a href="function\..*?\.html">([\w\\]+)</a>) do |m|
              keywords[name].push m[0]
            end
          end

          keywords
        end

        def render_output(keywords, &b)
          return enum_for(:render_output, keywords).to_a.join("\n") unless b

          yield   "# -*- coding: utf-8 -*- #"
          yield   "# frozen_string_literal: true"
          yield   ""
          yield   "# DO NOT EDIT"
          yield   "# This file is automatically generated by `rake builtins:php`."
          yield   "# See tasks/builtins/php.rake for more info."
          yield   ""
          yield   "module Rouge"
          yield   "  module Lexers"
          yield   "    class PHP"
          yield   "      def self.builtins"
          yield   "        @builtins ||= {}.tap do |b|"
          keywords.each do |n, fs|
            yield "          b[#{n.inspect}] = Set.new #{fs.inspect}"
          end
          yield   "        end"
          yield   "      end"
          yield   "    end"
          yield   "  end"
          yield   "end"
        end
      end
    end
  end
end

def php_references(&b)
  return enum_for :php_references unless block_given?

  php_manual_url = 'http://us3.php.net/distributions/manual/php_manual_en.tar.gz'

  sh 'mkdir -p /tmp/rouge', { verbose: false }
  Dir.chdir '/tmp/rouge' do
    sh "wget -qO- #{php_manual_url} | tar -xz", { verbose: false }
    Dir.chdir './php-chunked-xhtml' do
      Dir.glob('./ref.*').sort.each { |x| yield File.read(x) }
    end
  end
end

def php_functions(&b)
  return enum_for :php_functions unless block_given?

  php_references do |file|
    file =~ %r(<title>(.*?) Functions</title>)
    name = $1

    next unless name

    functions = file.scan %r(<a href="function\..*?\.html">(.*?)</a>)

    yield [name, functions]
  end
end

def php_builtins_source
  yield   "# -*- coding: utf-8 -*- #"
  yield   "# frozen_string_literal: true"
  yield   ""
  yield   "# automatically generated by `rake builtins:php`"
  yield   "module Rouge"
  yield   "  module Lexers"
  yield   "    class PHP"
  yield   "      def self.builtins"
  yield   "        @builtins ||= {}.tap do |b|"
  php_functions do |n, fs|
    yield "          b[#{n.inspect}] = Set.new %w(#{fs.join(' ')})"
  end
  yield   "        end"
  yield   "      end"
  yield   "    end"
  yield   "  end"
  yield   "end"
end
